{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0993b44c-58f3-4d7d-ac31-80871a867040",
   "metadata": {},
   "source": [
    "# AI Property Rental Assistant for Durham\n",
    "This notebook creates an intelligent property rental assistant that:\n",
    "1. Scrapes rental property listings from OnTheMarket.com\n",
    "2. Uses OpenAI's GPT-4o-mini to analyze and recommend properties based on user preferences\n",
    "3. Provides formatted recommendations in markdown for easy reading\n",
    "\n",
    "Purpose: Help students and professionals find suitable rental properties in Durham, UK"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f3fa597-bac5-496f-b0c6-ac1cb524062d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "from dotenv import load_dotenv\n",
    "from bs4 import BeautifulSoup\n",
    "from IPython.display import Markdown, display\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dfa715c4-81d4-4f1e-87d8-6cf7fa17db71",
   "metadata": {},
   "outputs": [],
   "source": [
    "# =====================================\n",
    "# STEP 1: ENVIRONMENT SETUP & API KEYS\n",
    "# =====================================\n",
    "\n",
    "# Load environment variables from .env file\n",
    "# Make sure you have a .env file with: OPENAI_API_KEY=your_key_here\n",
    "load_dotenv(override=True)\n",
    "api_key = os.getenv('OPENAI_API_KEY')\n",
    "\n",
    "# Validate the OpenAI API key format and existence\n",
    "if not api_key:\n",
    "    print(\"No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!\")\n",
    "elif not api_key.startswith(\"sk-proj-\"):\n",
    "    print(\"An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook\")\n",
    "elif api_key.strip() != api_key:\n",
    "    print(\"An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook\")\n",
    "else:\n",
    "    print(\"API key found and looks good so far!\")\n",
    "\n",
    "# Initialize OpenAI client\n",
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7e44572-1cda-42d2-a6ff-45f462fd436f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# =====================================\n",
    "# STEP 2: WEB SCRAPING SETUP\n",
    "# =====================================\n",
    "\n",
    "# HTTP headers to mimic a real browser request\n",
    "# Many websites block requests without proper headers\n",
    "headers = {\n",
    "    \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
    "}\n",
    "\n",
    "class Website:\n",
    "    \"\"\"\n",
    "    A class to represent and scrape content from a webpage.\n",
    "    \n",
    "    This class handles:\n",
    "    - Fetching HTML content from a URL\n",
    "    - Parsing HTML with BeautifulSoup\n",
    "    - Extracting clean text content (removing scripts, styles, etc.)\n",
    "    - Error handling for failed requests\n",
    "    \n",
    "    Attributes:\n",
    "        url (str): The URL of the website\n",
    "        title (str): The page title\n",
    "        text (str): Clean text content from the page body\n",
    "    \"\"\"\n",
    "    \n",
    "    def __init__(self, url):\n",
    "        \"\"\"\n",
    "        Initialize Website object by scraping content from the given URL.\n",
    "        \n",
    "        Args:\n",
    "            url (str): The website URL to scrape\n",
    "        \"\"\"\n",
    "        self.url = url\n",
    "        try:\n",
    "            # Make HTTP request with timeout to prevent hanging\n",
    "            response = requests.get(url, headers=headers, timeout=10)\n",
    "            response.raise_for_status()  # Raises an HTTPError for bad responses\n",
    "            \n",
    "            # Parse HTML content\n",
    "            soup = BeautifulSoup(response.content, 'html.parser')\n",
    "            \n",
    "            # Extract page title\n",
    "            self.title = soup.title.string if soup.title else \"No title found\"\n",
    "            \n",
    "            # Clean up the HTML by removing irrelevant elements\n",
    "            if soup.body:\n",
    "                # Remove scripts, styles, images, and input elements\n",
    "                for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
    "                    irrelevant.decompose()\n",
    "                \n",
    "                # Extract clean text with proper line separation\n",
    "                self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
    "            else:\n",
    "                self.text = \"No body content found\"\n",
    "                \n",
    "        except requests.RequestException as e:\n",
    "            # Handle network errors gracefully\n",
    "            print(f\"Error fetching website: {e}\")\n",
    "            self.title = \"Error loading page\"\n",
    "            self.text = \"Could not load page content\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a97d9c34-2831-4730-949e-bba1b6ac9bb3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# =====================================\n",
    "# STEP 3: AI ASSISTANT FUNCTIONS\n",
    "# =====================================\n",
    "\n",
    "def house_renting(system_prompt, user_prompt):\n",
    "    \"\"\"\n",
    "    Send prompts to OpenAI's GPT model and get rental recommendations.\n",
    "    \n",
    "    This function:\n",
    "    - Formats the conversation for the AI model\n",
    "    - Sends requests to GPT-4o-mini (cost-effective model)\n",
    "    - Returns the AI's response as a string\n",
    "    \n",
    "    Args:\n",
    "        system_prompt (str): Instructions for how the AI should behave\n",
    "        user_prompt (str): The user's specific request with property data\n",
    "        \n",
    "    Returns:\n",
    "        str: AI-generated rental recommendations in markdown format\n",
    "    \"\"\"\n",
    "    messages = [\n",
    "        {\"role\": \"system\", \"content\": system_prompt},\n",
    "        {\"role\": \"user\", \"content\": user_prompt}\n",
    "    ]\n",
    "    \n",
    "    # Call OpenAI API\n",
    "    response = openai.chat.completions.create(\n",
    "        model=\"gpt-4o-mini\",  # Cost-effective model, good for this task\n",
    "        messages=messages,\n",
    "    )\n",
    "    \n",
    "    return response.choices[0].message.content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d0c4b96-b907-45ed-8a4d-a67d8f7e4f33",
   "metadata": {},
   "outputs": [],
   "source": [
    "# =====================================\n",
    "# STEP 4: AI SYSTEM CONFIGURATION\n",
    "# =====================================\n",
    "\n",
    "# Define how the AI assistant should behave\n",
    "# This is crucial for getting consistent, helpful responses\n",
    "system_prompt = \"\"\"\n",
    "You are a helpful real estate assistant specializing in UK property rentals. Your job is to guide users in finding houses to rent, especially in Durham. Follow these rules:\n",
    "\n",
    "1. Always ask clarifying questions if user input is vague. Determine location, budget, number of bedrooms, and tenant type (e.g. student, family, professional).\n",
    "2. Use structured data provided from the website (like property listings) to identify relevant options.\n",
    "3. If listings are provided, filter and rank them based on the user's preferences.\n",
    "4. Recommend up to 5 top properties with rent price, bedroom count, key features, and location.\n",
    "5. Always respond in markdown with clean formatting using headers, bold text, and bullet points.\n",
    "6. If no listings match well, provide tips (e.g. \"try adjusting your budget or search radius\").\n",
    "7. Stay concise, helpful, and adapt to whether the user is a student, family, couple, or solo tenant.\n",
    "\"\"\"\n",
    "\n",
    "def user_prompt_for_renting(website, user_needs):\n",
    "    \"\"\"\n",
    "    Create a formatted prompt that combines user requirements with scraped property data.\n",
    "    \n",
    "    This function:\n",
    "    - Takes user preferences and website content\n",
    "    - Formats them into a clear prompt for the AI\n",
    "    - Limits content to first 4000 characters to stay within token limits\n",
    "    \n",
    "    Args:\n",
    "        website (Website): The scraped website object\n",
    "        user_needs (str): Description of what the user is looking for\n",
    "        \n",
    "    Returns:\n",
    "        str: Formatted prompt ready to send to the AI\n",
    "    \"\"\"\n",
    "    user_prompt = f\"\"\"\n",
    "I want to rent a house and here's what I'm looking for:\n",
    "{user_needs}\n",
    "\n",
    "Here are the property listings I found on the website titled: \"{website.title}\".\n",
    "Please analyze them and recommend the best 3–5 options that match my needs. If none are suitable, tell me why and offer suggestions.\n",
    "\n",
    "The page content is below:\n",
    "{website.text[:4000]}  # Truncated to first 4000 characters to manage token usage\n",
    "\"\"\"\n",
    "    return user_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cecb1f11-060a-4737-828c-e94ae04a42ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "# =====================================\n",
    "# STEP 5: MAIN EXECUTION\n",
    "# =====================================\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    print(\"Starting AI Property Rental Assistant...\")\n",
    "    print(\"=\" * 50)\n",
    "    \n",
    "    # Configure the property search\n",
    "    website_url = \"https://www.onthemarket.com/to-rent/property/durham/\"\n",
    "    print(f\"🔍 Scraping properties from: {website_url}\")\n",
    "    \n",
    "    # Scrape the website\n",
    "    website = Website(website_url)\n",
    "    \n",
    "    # Display scraping results\n",
    "    print(f\"Website Title: {website.title}\")\n",
    "    print(f\"Content Length: {len(website.text)} characters\")\n",
    "    print(f\"Successfully scraped property listings\")\n",
    "    print()\n",
    "    \n",
    "    # Define user requirements\n",
    "    # TODO: Make this interactive by adding input() statements\n",
    "    user_needs = \"I'm a student looking for a 2-bedroom house in Durham under £2,000/month\"\n",
    "    print(f\"User Requirements: {user_needs}\")\n",
    "    print()\n",
    "    \n",
    "    # Generate AI prompt\n",
    "    user_prompt = user_prompt_for_renting(website, user_needs)\n",
    "    \n",
    "    # Get AI recommendations\n",
    "    print(\"Generating AI recommendations...\")\n",
    "    output = house_renting(system_prompt, user_prompt)\n",
    "    \n",
    "    # Display results\n",
    "    display(Markdown(output))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:llms]",
   "language": "python",
   "name": "conda-env-llms-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
